How to do it?:

Submission: Submit the link on Github of the assignment to Blackboard.


  1. Write the following function. Give examples to test your function.

Hint: Similar function

options(warn=-1)
library(tidyverse)
df = read_csv('titanic.csv')

sort(colSums(is.na(df)),decreasing = TRUE)
##       Cabin         Age    Embarked PassengerId    Survived      Pclass 
##         687         177           2           0           0           0 
##        Name         Sex       SibSp       Parch      Ticket        Fare 
##           0           0           0           0           0           0
mean_impute <- function(x)
{
  #Make sure x has missing values
  if(sum(is.na(x))>0)
    {
    # Find the mean of x
    mean_of_x <- mean(x, na.rm = TRUE)
    # Replace the missing by the mean
    library(tidyr)
    x <- replace_na(x, mean_of_x) 
    }
return(x)    
}

numeric_impute <- function(d)
{
    for (i in 1:length(d))
    {
        d[[i]] <- mean_impute(d[[i]])
    }
    return(d)
}

df = numeric_impute(df)

sort(colSums(is.na(df)),decreasing = TRUE)
##       Cabin    Embarked PassengerId    Survived      Pclass        Name 
##         687           2           0           0           0           0 
##         Sex         Age       SibSp       Parch      Ticket        Fare 
##           0           0           0           0           0           0

  1. Write the following function. Give examples to test your function.

Hint: Combine the function in Problem 1 and the function in this example

df = read_csv('titanic.csv')
mean_impute <- function(x)
{
  #Make sure x has missing values
  if(sum(is.na(x))>0)
    {
    # Find the mean of x
    mean_of_x <- mean(x, na.rm = TRUE)
    # Replace the missing by the mean
    library(tidyr)
    x <- replace_na(x, mean_of_x) 
    }
return(x)    
}
mode_impute <- function(x)
{
  if(!is.numeric(x))
  {
    # Find the mode of x
    mode_of_x <- names(sort(-table(x)))[1]
    # Replace the missing by the mode
    library(tidyr)
    x <- replace_na(x, mode_of_x) 
  }
return(x)    
}
numeric_impute <- function(d)
{
    for (i in 1:length(d))
    {
        d[[i]] <- mean_impute(d[[i]])
    }
    for (i in 1:length(d))
    {
        d[[i]] <- mode_impute(d[[i]])
    }
    return(d)
}

sort(colSums(is.na(df)),decreasing = TRUE)
##       Cabin         Age    Embarked PassengerId    Survived      Pclass 
##         687         177           2           0           0           0 
##        Name         Sex       SibSp       Parch      Ticket        Fare 
##           0           0           0           0           0           0
df = numeric_impute(df)
sort(colSums(is.na(df)),decreasing = TRUE)
## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0           0 
##       SibSp       Parch      Ticket        Fare       Cabin    Embarked 
##           0           0           0           0           0           0

  1. Write the following function. Give examples to test your function.

Hint: Similar function

bar_plot <- function(d)
{
  library(ggplot2)
  for (i in 1:length(d))
  {
    if (!(is.numeric(d[[i]])))
    {
      print(ggplot(d, aes(x = d[[i]]))+ 
              geom_bar()+
              labs(x = names(d)[i]))
    }
  }
}
bar_plot(df)


  1. Write the following function. Give examples to test your function.

Hint: Similar function

bar_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (!is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], color = d[[j]]))+ 
              geom_bar()+labs(x = names(d)[i], color = names(d)[j]))
    }
  }
}
bar_plot2(df)


  1. Write the following function. Give examples to test your function.

Hint: Combine this function, this function, and the function in Question 4. One way to combine is creating a new function, quick_plot, and call these three functions within quic_kplot.

scatter_plot <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (is.numeric(d[[i]])&is.numeric(d[[j]]))
    {
      print(ggplot(d, aes(x = d[[i]], y = d[[j]]))+ 
              geom_point()+
              labs(x = names(d)[i], y = names(d)[j]))
    }
  }
}

density_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], color = d[[j]]))+ 
              geom_density()+labs(x = names(d)[i], color = names(d)[j]))
    }
  }
}

bar_plot2 <- function(d)
{
  library(ggplot2)
  l <- length(d)
  for (i in 1:(l-1))
    for (j in (i+1):l)
  {
    if (!is.numeric(d[[i]])& (!is.numeric(d[[j]])))
    {
      print(ggplot(d, aes(x = d[[i]], color = d[[j]]))+ 
              geom_bar()+labs(x = names(d)[i], color = names(d)[j]))
    }
  }
}
bar_plot2(df)

quick_plot <- function(d)
{
  scatter_plot(d)
  density_plot2(d)
  bar_plot2(d)
}
quick_plot(df)